SOURCE_DATASET_INFO = {
    "Airoboros 2.2": {
        "path": "jondurbin/airoboros-2.2",
        "convo_key": "instruction",
        "message_key": "response",
    },
    "CamelAI Biology": {"path": "camel-ai/biology"},
    "CamelAI Chemistry": {"path": "camel-ai/chemistry"},
    "CamelAI Math": {"path": "camel-ai/math"},
    "CamelAI Physics": {"path": "camel-ai/physics"},
    "Chatbot Arena": {
        "path": "lmsys/chatbot_arena_conversations",
        "convo_key": "conversation_b",
        "message_key": "content",
        "from_key": "role",
        "human_value": "user",
    },
    "lmsys-1m": {
        "path": "lmsys/lmsys-chat-1m",
        "convo_key": "conversation",
        "message_key": "content",
        "from_key": "role",
        "human_value": "user",
    },
    "Collective Cognition": {
        "path": "CollectiveCognition/chats-data-2023-09-22",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "Evol Instruct 70K": {
        "path": "WizardLM/WizardLM_evol_instruct_70k",
        "convo_key": "instruction",
        "message_key": "output",
        "from_key": "from",
        "human_value": "human",
    },
    "Evol Instruct 140K": {
        "path": "WizardLM/WizardLM_evol_instruct_V2_196k",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "Glaive Code Assistant": {
        "path": "glaiveai/glaive-code-assistant",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "GPT4-LLM": {"path": "teknium/GPT4-LLM-Cleaned"},
    "GPTeacher": {"path": "teknium/GPTeacher-General-Instruct"},
    "MetaMath 40k": {"path": "meta-math/MetaMathQA"},
    "SlimOrca 550K": {
        "path": "Open-Orca/SlimOrca",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "Platypus": {"path": "garage-bAInd/Open-Platypus"},
    "ShareGPT": {
        "path": "anon8231489123/ShareGPT_Vicuna_unfiltered",
        "data_files": "ShareGPT_V3_unfiltered_cleaned_split.json",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "CogStack": {
        "url": "https://raw.githubusercontent.com/CogStack/OpenGPT/refs/heads/main/data/medical_tasks_gpt4/prepared_generated_data_for_medical_tasks.csv",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "CoT Alpaca": {
        "path": "causal-lm/cot_alpaca_gpt4",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "Unnatural Instructions": {
        "path": "mrm8488/unnatural-instructions-full",
        "convo_key": "instances",
    },
    "caseus_custom": {
        "path": "Locutusque/caseus_custom",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
    "dataforge_economics": {
        "path": "teknium/dataforge-economics",
        "convo_key": "conversations",
        "message_key": "value",
        "from_key": "from",
        "human_value": "human",
    },
}
